In [ ]:
%load preamble_directives.py
In [2]:
from source_code_analysis.models import SoftwareProject
In [3]:
projects = SoftwareProject.objects.all()
The Replication Dataset contains the following report files:
Benchmark_Coherence_Data.txt
, Benchmark_Raw_Data.txt
):
Report files containing information about the Coherence and the raw data of methods from all the 4 considered
Software Systems.CoffeeMaker_Coherence_Data.txt
, CoffeeMaker_Raw_Data.txt
):
Report files providing the Coherence and the raw data of methods gathered from the CoffeeMaker Software System.JFreeChart060_Coherence_Data.txt
, JFreeChart060_Raw_Data.txt
):
Report files providing the Coherence and the raw data of methods gathered from the JFreeChart 0.6.0 Software System.JFreeChart071_Coherence_Data.txt
, JFreeChart071_Raw_Data.txt
):
Report files providing the Coherence and the raw data of methods gathered from the JFreeChart 0.7.1 Software System.JHotDraw741_Coherence_Data.txt
, JHotDraw741_Raw_Data.txt
):
Report files providing the Coherence and the raw data of methods gathered from the JHotDraw 7.4.1 Software System.Report files providing information about the Coherence of methods are structured according to the CSV (i.e., Comma Separated Values) format. Each line of the file contains the following information:
method_id, coherence
method_id
: the unique identifier of the corresponding methodcoherence
: the coherence value associated to the comment and the implementation of the referred method.
Allowed Coherence Values are: NOT_COHERENT
and COHERENT
.
In case, it would be more than straightforward to translate these values into 0
, 1
values, respectively.
In [4]:
# Write Coherence Report
def write_coherence_report(coherence_report_filepath, target_methods):
with open(coherence_report_filepath, 'w') as coherence_report:
for method in target_methods:
evaluation = method.agreement_evaluations.all()[0]
coherence_value = 'COHERENT' if evaluation.agreement_vote in [3, 4] else 'NOT_COHERENT'
coherence_report.write('{0}, {1}\n'.format(method.pk, coherence_value))
All the report files containing the raw data of the methods share exactly the same multiline structure. That is (for each method):
method_id, method_name, class_name, software_system
filepath, start_line, end_line,
Length of the Head Comments
Head Comment
Length of the Implementation
Method Implementation
###
In [9]:
# Write Raw Data Report
def write_raw_data_report(raw_report_filepath, target_methods):
with open(raw_report_filepath, 'w') as raw_report:
for method in target_methods:
software_system_name = method.project.name + method.project.version.replace('.', '')
raw_report.write('{mid}, {method_name}, {class_name}, {software_system}\n'.format(
mid=method.id, method_name=method.method_name, class_name=method.code_class.class_name,
software_system=software_system_name))
method_fp = method.file_path
relative_filepath = method_fp[method_fp.find('extracted')+len('extracted')+1:]
raw_report.write('{filepath}, {start_line}, {end_line}\n'.format(filepath=relative_filepath,
start_line=method.start_line,
end_line=method.end_line))
raw_report.write('{comment_len}\n'.format(comment_len=len(method.comment.splitlines())))
raw_report.write('{comment}'.format(comment=method.comment))
if not method.comment.endswith('\n'):
raw_report.write('\n')
raw_report.write('{code_len}\n'.format(code_len=len(method.code_fragment.splitlines())))
raw_report.write('{code}'.format(code=method.code_fragment))
if not method.code_fragment.endswith('\n'):
raw_report.write('\n')
# Last Line of this method
raw_report.write('###\n')
In [10]:
RAW_DATA_SUFFIX = 'Raw_Data.txt'
COHERENCE_DATA_SUFFIX = 'Coherence_Data.txt'
In [11]:
import os
# Create Report Folder
report_folderpath = os.path.join(os.path.abspath(os.path.curdir), 'report_files')
if not os.path.exists(report_folderpath):
os.makedirs(report_folderpath)
In [15]:
all_methods_list = list()
# Project-Specific Reports
for project in projects:
software_system_name = project.name + project.version.replace('.', '')
target_methods = list()
project_methods = project.code_methods.order_by('pk')
# Collect Project Methods whose evaluations are Coherent|Not Coherent
for method in project_methods:
evaluation = method.agreement_evaluations.all()[0]
if not evaluation.wrong_association and evaluation.agreement_vote != 2:
target_methods.append(method)
all_methods_list.extend(target_methods)
# Coherence Data Report
coherence_report_filename = '{0}_{1}'.format(software_system_name, COHERENCE_DATA_SUFFIX)
coherence_report_filepath = os.path.join(report_folderpath, coherence_report_filename)
write_coherence_report(coherence_report_filepath, target_methods)
# Raw Data Report
raw_report_filename = '{0}_{1}'.format(software_system_name, RAW_DATA_SUFFIX)
raw_report_filepath = os.path.join(report_folderpath, raw_report_filename)
write_raw_data_report(raw_report_filepath, target_methods)
# -- Entire Benchmark Reports
# Coherence Data Report
coherence_report_filename = '{0}_{1}'.format('Benchmark', COHERENCE_DATA_SUFFIX)
coherence_report_filepath = os.path.join(report_folderpath, coherence_report_filename)
write_coherence_report(coherence_report_filepath, all_methods_list)
# Raw Data Report
raw_report_filename = '{0}_{1}'.format('Benchmark', RAW_DATA_SUFFIX)
raw_report_filepath = os.path.join(report_folderpath, raw_report_filename)
write_raw_data_report(raw_report_filepath, all_methods_list)
In [ ]: